library(tidyverse)
library(tidymodels)
library(lubridate)
library(magrittr)
library(plotly) # interactive ggplot
library(here) # dir 
library(parallel) # multi-processing
library(knitr)
here()
## [1] "C:/Users/rhyeu/Documents/GitHub/kaggle-study"

Walmart in Tidymodels

Walmart mothership Data

train <- read_csv(here("walmart/train.csv.zip"))
## 
## -- Column specification --------------------------------------------------------
## cols(
##   Store = col_double(),
##   Dept = col_double(),
##   Date = col_date(format = ""),
##   Weekly_Sales = col_double(),
##   IsHoliday = col_logical()
## )
test <- read_csv(here("walmart/test.csv.zip"))
## 
## -- Column specification --------------------------------------------------------
## cols(
##   Store = col_double(),
##   Dept = col_double(),
##   Date = col_date(format = ""),
##   IsHoliday = col_logical()
## )
train %<>% janitor::clean_names()
test %<>% janitor::clean_names()
train %>% 
    select(date, weekly_sales) %>% 
    ggplot(aes(x = date, y = weekly_sales)) + 
    geom_line() -> p

ggplotly(p)
set.seed(1234)
# walmart_folds <- vfold_cv(train, v = 5, strata = is_holiday)

walmart_validation <- validation_split(train,
                                       strata = is_holiday,
                                       prop = 0.7)

Preprocess interface in tidymodels

  • recipe
    • step
      • step_corr()
      • step_center
      • step_scale
    • prep
  • workflow

recipe

walmart_recipe <- train %>% 
    recipe(weekly_sales ~ .) %>% 
        step_date(date, features = c("month")) %>% 
    step_rm(date)  %>% 
    step_mutate(
             # store = as.factor(store),
             # dept = as.factor(dept)
            # , date_year = as.factor(date_year)
            ) %>% 
    # step_dummy(all_nominal(), -all_outcomes()) %>%
    prep()

walmart_recipe %>% print()
## Data Recipe
## 
## Inputs:
## 
##       role #variables
##    outcome          1
##  predictor          4
## 
## Training data contained 421570 data points and no missing data.
## 
## Operations:
## 
## Date features from date [trained]
## Variables removed date [trained]
## Variable mutation for  [trained]
train_final <- juice(walmart_recipe)

# train_final %>% colnames()
train_final %>% 
    head() %>% 
    kable()
store dept is_holiday weekly_sales date_month
1 1 FALSE 24924.50 2
1 1 TRUE 46039.49 2
1 1 FALSE 41595.55 2
1 1 FALSE 19403.54 2
1 1 FALSE 21827.90 3
1 1 FALSE 21043.39 3
walmart_recipe %>% 
    bake(test) %>% 
    # colnames()
    head() %>% 
    kable()
store dept is_holiday date_month
1 1 FALSE 11
1 1 FALSE 11
1 1 FALSE 11
1 1 TRUE 11
1 1 FALSE 11
1 1 FALSE 12

random Forest worflow

cores <- parallel::detectCores() -1
rf_model <- 
    rand_forest(mtry = tune(), min_n = tune(), trees = 1000) %>% 
    set_engine("ranger", seed = 1234, num.threads = cores) %>% 
    set_mode("regression")
walmart_wflow <- workflow() %>% 
    add_model(rf_model) %>% 
    add_recipe(walmart_recipe)

walmart_wflow
## == Workflow ====================================================================
## Preprocessor: Recipe
## Model: rand_forest()
## 
## -- Preprocessor ----------------------------------------------------------------
## 3 Recipe Steps
## 
## * step_date()
## * step_rm()
## * step_mutate()
## 
## -- Model -----------------------------------------------------------------------
## Random Forest Model Specification (regression)
## 
## Main Arguments:
##   mtry = tune()
##   trees = 1000
##   min_n = tune()
## 
## Engine-Specific Arguments:
##   seed = 1234
##   num.threads = cores
## 
## Computational engine: ranger

hyperparameter tuning

set.seed(1234)

rf_result <- walmart_wflow %>% 
    tune_grid(walmart_validation,
              grid = 5,
              control = control_grid(save_pred = TRUE),
              metrics = metric_set(rmse))
## i Creating pre-processing data to finalize unknown parameter: mtry
## 
## Attaching package: 'rlang'
## The following object is masked from 'package:magrittr':
## 
##     set_names
## The following objects are masked from 'package:purrr':
## 
##     %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,
##     flatten_lgl, flatten_raw, invoke, list_along, modify, prepend,
##     splice
## 
## Attaching package: 'vctrs'
## The following object is masked from 'package:dplyr':
## 
##     data_frame
## The following object is masked from 'package:tibble':
## 
##     data_frame
rf_result %>% show_best()
## # A tibble: 5 x 8
##    mtry min_n .metric .estimator   mean     n std_err .config             
##   <int> <int> <chr>   <chr>       <dbl> <int>   <dbl> <chr>               
## 1     4    14 rmse    standard    4736.     1      NA Preprocessor1_Model3
## 2     3    33 rmse    standard    5215.     1      NA Preprocessor1_Model5
## 3     2     6 rmse    standard    7586.     1      NA Preprocessor1_Model2
## 4     2    25 rmse    standard    7866.     1      NA Preprocessor1_Model1
## 5     1    21 rmse    standard   18690.     1      NA Preprocessor1_Model4
rf_best <- 
    rf_result %>% 
    select_best(metric = "rmse")

rf_best
## # A tibble: 1 x 3
##    mtry min_n .config             
##   <int> <int> <chr>               
## 1     4    14 Preprocessor1_Model3

best fit with tuned hyperparameters

### the last model
rf_best_model <- 
    rand_forest(mtry = rf_best$mtry, min_n = rf_best$min_n, trees = 1000) %>% 
    set_engine("ranger", seed = 1234, 
               num.threads = cores,
               importance = "impurity") %>% 
    set_mode("regression")


final_rf_wflow <- walmart_wflow %>% 
    update_model(rf_best_model)

set.seed(1234)
rf_best_fit <- final_rf_wflow %>% 
    fit(train)
rf_best_fit
## == Workflow [trained] ==========================================================
## Preprocessor: Recipe
## Model: rand_forest()
## 
## -- Preprocessor ----------------------------------------------------------------
## 3 Recipe Steps
## 
## * step_date()
## * step_rm()
## * step_mutate()
## 
## -- Model -----------------------------------------------------------------------
## Ranger result
## 
## Call:
##  ranger::ranger(x = maybe_data_frame(x), y = y, mtry = min_cols(~rf_best$mtry,      x), num.trees = ~1000, min.node.size = min_rows(~rf_best$min_n,      x), seed = ~1234, num.threads = ~cores, importance = ~"impurity",      verbose = FALSE) 
## 
## Type:                             Regression 
## Number of trees:                  1000 
## Sample size:                      421570 
## Number of independent variables:  4 
## Mtry:                             4 
## Target node size:                 14 
## Variable importance mode:         impurity 
## Splitrule:                        variance 
## OOB prediction error (MSE):       22231433 
## R squared (OOB):                  0.9568989

submit prediction

subfile <- read_csv(here("walmart/sampleSubmission.csv.zip"))
## 
## -- Column specification --------------------------------------------------------
## cols(
##   Id = col_character(),
##   Weekly_Sales = col_double()
## )
subfile$Weekly_Sales <- rf_best_fit %>% 
    predict(test) %>% 
    select(.pred) %>% unlist()


subfile
## # A tibble: 115,064 x 2
##    Id             Weekly_Sales
##    <chr>                 <dbl>
##  1 1_1_2012-11-02       24956.
##  2 1_1_2012-11-09       24956.
##  3 1_1_2012-11-16       24956.
##  4 1_1_2012-11-23       21499.
##  5 1_1_2012-11-30       24956.
##  6 1_1_2012-12-07       40685.
##  7 1_1_2012-12-14       40685.
##  8 1_1_2012-12-21       40685.
##  9 1_1_2012-12-28       24224.
## 10 1_1_2013-01-04       18222.
## # ... with 115,054 more rows
write.csv(subfile, row.names = FALSE,
          here("walmart/tuning-rf.csv"))
last_week <- c(20660.01047 , 20238.71579)
today <- c(3672.12956, 3536.56464)

scores <- rbind(last_week, today) 
colnames(scores) <- c("private", "public" )
scores
##            private    public
## last_week 20660.01 20238.716
## today      3672.13  3536.565
  • last week’s score : 20660.01047 / 20238.71579
  • today’s score : 3672.12956 / 3536.56464

Attachment 1 : CV with decision tree in tidymodels

set.seed(1234)
walmart_split <- initial_split(train, prop = 0.7, strata = is_holiday)

walmart_split
## <Analysis/Assess/Total>
## <295099/126471/421570>
train_data <- walmart_split %>% training()
val_data <- walmart_split %>% testing()
tune_spec <-
    decision_tree(
        cost_complexity = tune(),
        tree_depth = tune()
    ) %>% 
    set_engine("rpart") %>%     
    set_mode("regression")


tune_spec
## Decision Tree Model Specification (regression)
## 
## Main Arguments:
##   cost_complexity = tune()
##   tree_depth = tune()
## 
## Computational engine: rpart
tree_grid <- grid_regular(cost_complexity(),
                          tree_depth(),
                          levels = 3
)

tree_grid %>% head() %>% kable()
cost_complexity tree_depth
0.0e+00 1
3.2e-06 1
1.0e-01 1
0.0e+00 8
3.2e-06 8
1.0e-01 8
# tree_grid %>% count(cost_complexity)
# tree_grid %>% count(tree_depth)
set.seed(1234)
walmart_folds <- vfold_cv(train, v= 5, strata = is_holiday)

# walmart_folds

tree_wf <- workflow() %>% 
    add_model(tune_spec) %>% 
    add_recipe(walmart_recipe)
walmart_treefit <- 
    tree_wf %>% 
    tune_grid(
        resamples = walmart_folds,
        grid = tree_grid
    )
## 
## Attaching package: 'rpart'
## The following object is masked from 'package:dials':
## 
##     prune
walmart_treefit
## # Tuning results
## # 5-fold cross-validation using stratification 
## # A tibble: 5 x 4
##   splits                 id    .metrics          .notes          
##   <list>                 <chr> <list>            <list>          
## 1 <split [337.3K/84.3K]> Fold1 <tibble [18 x 6]> <tibble [0 x 1]>
## 2 <split [337.3K/84.3K]> Fold2 <tibble [18 x 6]> <tibble [0 x 1]>
## 3 <split [337.3K/84.3K]> Fold3 <tibble [18 x 6]> <tibble [0 x 1]>
## 4 <split [337.3K/84.3K]> Fold4 <tibble [18 x 6]> <tibble [0 x 1]>
## 5 <split [337.3K/84.3K]> Fold5 <tibble [18 x 6]> <tibble [0 x 1]>
walmart_treefit %>% 
    collect_metrics() %>% 
    head(12) %>% 
    kable()
cost_complexity tree_depth .metric .estimator mean n std_err .config
0.0e+00 1 rmse standard 2.132038e+04 5 66.7289279 Preprocessor1_Model1
0.0e+00 1 rsq standard 1.187440e-01 5 0.0021923 Preprocessor1_Model1
3.2e-06 1 rmse standard 2.132038e+04 5 66.7289279 Preprocessor1_Model2
3.2e-06 1 rsq standard 1.187440e-01 5 0.0021923 Preprocessor1_Model2
1.0e-01 1 rmse standard 2.132038e+04 5 66.7289279 Preprocessor1_Model3
1.0e-01 1 rsq standard 1.187440e-01 5 0.0021923 Preprocessor1_Model3
0.0e+00 8 rmse standard 1.309780e+04 5 81.7228325 Preprocessor1_Model4
0.0e+00 8 rsq standard 6.673507e-01 5 0.0039582 Preprocessor1_Model4
3.2e-06 8 rmse standard 1.309840e+04 5 81.7990911 Preprocessor1_Model5
3.2e-06 8 rsq standard 6.673202e-01 5 0.0039591 Preprocessor1_Model5
1.0e-01 8 rmse standard 2.132038e+04 5 66.7289279 Preprocessor1_Model6
1.0e-01 8 rsq standard 1.187440e-01 5 0.0021923 Preprocessor1_Model6
best_tree <- walmart_treefit %>% 
    select_best("rmse")

best_tree
## # A tibble: 1 x 3
##   cost_complexity tree_depth .config             
##             <dbl>      <int> <chr>               
## 1    0.0000000001         15 Preprocessor1_Model7
final_walmart_tree <-
    tree_wf %>% 
    finalize_workflow(best_tree)

final_walmart_tree
## == Workflow ====================================================================
## Preprocessor: Recipe
## Model: decision_tree()
## 
## -- Preprocessor ----------------------------------------------------------------
## 3 Recipe Steps
## 
## * step_date()
## * step_rm()
## * step_mutate()
## 
## -- Model -----------------------------------------------------------------------
## Decision Tree Model Specification (regression)
## 
## Main Arguments:
##   cost_complexity = 1e-10
##   tree_depth = 15
## 
## Computational engine: rpart
walmart_treefit2 <- 
    final_walmart_tree %>% 
    fit(data = train_data)
walmart_treefit2 %>% 
    predict(val_data) %>% 
    bind_cols(val_data) %>%  select(weekly_sales,.pred) %>% 
    metrics(truth = weekly_sales, estimate = .pred)
## # A tibble: 3 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rmse    standard    6951.   
## 2 rsq     standard       0.906
## 3 mae     standard    3100.

Attachment 2 :oridinary linear model fitting

walmart_recipe2 <- train_data %>% 
    recipe(weekly_sales ~ .) %>% 
        step_date(date, features = c("month")) %>% 
    step_rm(date)  %>% 
    step_mutate(
             store = as.factor(store),
             dept = as.factor(dept)
            # , date_year = as.factor(date_year)
            ) %>% 
    step_dummy(all_nominal(), -all_outcomes()) %>% prep()

walmart_recipe2 %>% print()
## Data Recipe
## 
## Inputs:
## 
##       role #variables
##    outcome          1
##  predictor          4
## 
## Training data contained 295099 data points and no missing data.
## 
## Operations:
## 
## Date features from date [trained]
## Variables removed date [trained]
## Variable mutation for store, dept [trained]
## Dummy variables from store, dept, date_month [trained]
lm_model <- 
    linear_reg() %>% 
    set_engine("lm") %>% 
    set_mode("regression")
walmart_wflow2 <- workflow() %>% 
    add_model(lm_model) %>% 
    add_recipe(walmart_recipe2)

walmart_wflow2
## == Workflow ====================================================================
## Preprocessor: Recipe
## Model: linear_reg()
## 
## -- Preprocessor ----------------------------------------------------------------
## 4 Recipe Steps
## 
## * step_date()
## * step_rm()
## * step_mutate()
## * step_dummy()
## 
## -- Model -----------------------------------------------------------------------
## Linear Regression Model Specification (regression)
## 
## Computational engine: lm
walmart_lmfit <- walmart_wflow2 %>% 
    fit(train_data)
walmart_lmfit %>% 
    predict(val_data) %>% 
    bind_cols(val_data) %>%  select(weekly_sales,.pred) %>% 
    metrics(truth = weekly_sales, estimate = .pred)
## # A tibble: 3 x 3
##   .metric .estimator    .estimate
##   <chr>   <chr>             <dbl>
## 1 rmse    standard   22704.      
## 2 rsq     standard       0.000238
## 3 mae     standard   15175.

??

walmart_lmfit %>% tidy()
## # A tibble: 2 x 5
##   term           estimate std.error statistic  p.value
##   <chr>             <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)      15926.      43.4    367.   0.      
## 2 is_holidayTRUE    1034.     163.       6.32 2.55e-10
untidy_fit <- lm(weekly_sales ~ ., data = juice(walmart_recipe2))
predict(untidy_fit, 
        newdata = 
            bake(walmart_recipe2, new_data = val_data)) %>% 
    bind_cols(val_data) %>%
    select(weekly_sales, '...1') %>%
    metrics(truth = weekly_sales,
            estimate = '...1')
## New names:
## * NA -> ...1
## # A tibble: 3 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rmse    standard   13184.   
## 2 rsq     standard       0.663
## 3 mae     standard    8115.
untidy_fit %>% summary()
## 
## Call:
## lm(formula = weekly_sales ~ ., data = juice(walmart_recipe2))
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -63240  -5837   -493   5039 599054 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     25090.8      270.8  92.642  < 2e-16 ***
## is_holidayTRUE    493.8      105.4   4.685 2.81e-06 ***
## store_X2         5083.3      221.5  22.951  < 2e-16 ***
## store_X3       -16541.3      229.5 -72.077  < 2e-16 ***
## store_X4         7388.5      220.9  33.445  < 2e-16 ***
## store_X5       -17936.4      229.0 -78.323  < 2e-16 ***
## store_X6         -141.4      221.8  -0.637 0.523834    
## store_X7       -14143.6      224.5 -62.996  < 2e-16 ***
## store_X8        -9085.4      224.0 -40.562  < 2e-16 ***
## store_X9       -14344.5      229.4 -62.540  < 2e-16 ***
## store_X10        4645.8      221.5  20.972  < 2e-16 ***
## store_X11       -2849.6      222.2 -12.822  < 2e-16 ***
## store_X12       -7511.1      225.2 -33.351  < 2e-16 ***
## store_X13        5857.6      220.2  26.600  < 2e-16 ***
## store_X14        6819.1      222.2  30.695  < 2e-16 ***
## store_X15      -13441.5      223.3 -60.207  < 2e-16 ***
## store_X16      -14912.9      226.4 -65.866  < 2e-16 ***
## store_X17       -8975.5      224.2 -40.040  < 2e-16 ***
## store_X18       -6593.1      223.8 -29.466  < 2e-16 ***
## store_X19       -1710.2      222.3  -7.692 1.46e-14 ***
## store_X20        7736.9      221.2  34.969  < 2e-16 ***
## store_X21      -11205.7      225.1 -49.771  < 2e-16 ***
## store_X22       -7312.6      224.9 -32.513  < 2e-16 ***
## store_X23       -2373.7      222.9 -10.649  < 2e-16 ***
## store_X24       -3048.9      221.2 -13.783  < 2e-16 ***
## store_X25      -11934.5      224.4 -53.188  < 2e-16 ***
## store_X26       -7963.1      223.2 -35.674  < 2e-16 ***
## store_X27        2876.4      221.1  13.012  < 2e-16 ***
## store_X28       -3450.9      221.9 -15.555  < 2e-16 ***
## store_X29      -14683.4      226.2 -64.918  < 2e-16 ***
## store_X30      -17858.8      245.7 -72.691  < 2e-16 ***
## store_X31       -2371.0      222.0 -10.681  < 2e-16 ***
## store_X32       -5640.6      222.0 -25.413  < 2e-16 ***
## store_X33      -21810.0      252.3 -86.459  < 2e-16 ***
## store_X34       -9118.7      222.0 -41.084  < 2e-16 ***
## store_X35       -8707.3      225.7 -38.571  < 2e-16 ***
## store_X36      -19513.9      255.1 -76.490  < 2e-16 ***
## store_X37      -16330.6      245.4 -66.555  < 2e-16 ***
## store_X38      -19134.1      242.1 -79.037  < 2e-16 ***
## store_X39       -1616.6      223.8  -7.224 5.06e-13 ***
## store_X40       -8457.0      223.3 -37.878  < 2e-16 ***
## store_X41       -4139.1      222.2 -18.630  < 2e-16 ***
## store_X42      -15221.7      247.0 -61.635  < 2e-16 ***
## store_X43      -13879.0      249.8 -55.552  < 2e-16 ***
## store_X44      -20885.3      244.4 -85.461  < 2e-16 ***
## store_X45      -10919.7      224.6 -48.623  < 2e-16 ***
## dept_X2         24232.1      280.9  86.263  < 2e-16 ***
## dept_X3         -7273.0      278.8 -26.083  < 2e-16 ***
## dept_X4          6610.7      279.6  23.644  < 2e-16 ***
## dept_X5          1844.9      280.0   6.589 4.44e-11 ***
## dept_X6        -15456.0      285.3 -54.178  < 2e-16 ***
## dept_X7          4948.2      280.4  17.647  < 2e-16 ***
## dept_X8         10888.0      280.2  38.861  < 2e-16 ***
## dept_X9           756.5      280.8   2.694 0.007062 ** 
## dept_X10        -1038.8      279.1  -3.723 0.000197 ***
## dept_X11        -4848.4      279.6 -17.340  < 2e-16 ***
## dept_X12       -15207.8      281.1 -54.101  < 2e-16 ***
## dept_X13        11268.8      279.3  40.348  < 2e-16 ***
## dept_X14        -4346.8      280.4 -15.504  < 2e-16 ***
## dept_X16        -5002.2      280.0 -17.863  < 2e-16 ***
## dept_X17        -9188.3      280.7 -32.739  < 2e-16 ***
## dept_X18       -12584.2      298.2 -42.197  < 2e-16 ***
## dept_X19       -20534.7      317.0 -64.774  < 2e-16 ***
## dept_X20       -14351.5      285.1 -50.334  < 2e-16 ***
## dept_X21       -14343.8      279.8 -51.263  < 2e-16 ***
## dept_X22       -10236.7      291.0 -35.184  < 2e-16 ***
## dept_X23         3477.7      288.3  12.061  < 2e-16 ***
## dept_X24       -14924.6      292.8 -50.969  < 2e-16 ***
## dept_X25       -10514.0      281.0 -37.413  < 2e-16 ***
## dept_X26       -12889.3      288.5 -44.678  < 2e-16 ***
## dept_X27       -19332.6      290.3 -66.591  < 2e-16 ***
## dept_X28       -19370.0      284.7 -68.048  < 2e-16 ***
## dept_X29       -15834.6      294.0 -53.859  < 2e-16 ***
## dept_X30       -17608.8      295.4 -59.619  < 2e-16 ***
## dept_X31       -17679.0      284.9 -62.064  < 2e-16 ***
## dept_X32       -13391.3      285.3 -46.934  < 2e-16 ***
## dept_X33       -14885.8      294.4 -50.570  < 2e-16 ***
## dept_X34        -5817.9      293.0 -19.857  < 2e-16 ***
## dept_X35       -18717.7      295.6 -63.327  < 2e-16 ***
## dept_X36       -19454.5      296.2 -65.687  < 2e-16 ***
## dept_X37       -22472.6      371.8 -60.440  < 2e-16 ***
## dept_X38        41709.7      279.6 149.156  < 2e-16 ***
## dept_X39       -27522.3     4011.5  -6.861 6.86e-12 ***
## dept_X40        25687.4      279.4  91.941  < 2e-16 ***
## dept_X41       -19341.8      292.8 -66.060  < 2e-16 ***
## dept_X42       -14269.7      280.4 -50.898  < 2e-16 ***
## dept_X43       -15939.3     4702.3  -3.390 0.000700 ***
## dept_X44       -16434.7      291.1 -56.451  < 2e-16 ***
## dept_X45       -22380.9      411.8 -54.342  < 2e-16 ***
## dept_X46          678.9      279.8   2.427 0.015243 *  
## dept_X47       -22996.0      651.2 -35.314  < 2e-16 ***
## dept_X48       -23796.4      431.4 -55.157  < 2e-16 ***
## dept_X49       -14453.3      310.1 -46.613  < 2e-16 ***
## dept_X50       -23276.4      443.7 -52.457  < 2e-16 ***
## dept_X51       -21451.5      468.7 -45.773  < 2e-16 ***
## dept_X52       -17846.1      280.6 -63.592  < 2e-16 ***
## dept_X54       -21954.7      303.4 -72.360  < 2e-16 ***
## dept_X55       -10065.6      290.9 -34.604  < 2e-16 ***
## dept_X56       -16540.8      285.8 -57.885  < 2e-16 ***
## dept_X58       -19491.4      311.5 -62.569  < 2e-16 ***
## dept_X59       -19164.2      284.2 -67.443  < 2e-16 ***
## dept_X60       -18905.6      288.2 -65.604  < 2e-16 ***
## dept_X65        27675.5     1345.7  20.565  < 2e-16 ***
## dept_X67       -11926.9      279.3 -42.698  < 2e-16 ***
## dept_X71       -15833.4      294.4 -53.777  < 2e-16 ***
## dept_X72        30454.2      284.8 106.920  < 2e-16 ***
## dept_X74        -5505.1      279.8 -19.673  < 2e-16 ***
## dept_X77       -23986.3     1324.6 -18.108  < 2e-16 ***
## dept_X78       -23337.9     1050.2 -22.222  < 2e-16 ***
## dept_X79         2659.6      280.4   9.487  < 2e-16 ***
## dept_X80        -7658.5      286.4 -26.742  < 2e-16 ***
## dept_X81        -3813.2      279.4 -13.648  < 2e-16 ***
## dept_X82        -3908.7      279.8 -13.968  < 2e-16 ***
## dept_X83       -16728.4      286.7 -58.343  < 2e-16 ***
## dept_X85       -17751.2      284.2 -62.450  < 2e-16 ***
## dept_X87        -5951.1      281.1 -21.171  < 2e-16 ***
## dept_X90        25886.5      280.0  92.449  < 2e-16 ***
## dept_X91        14431.8      279.7  51.597  < 2e-16 ***
## dept_X92        55986.5      280.1 199.879  < 2e-16 ***
## dept_X93         6950.4      286.0  24.300  < 2e-16 ***
## dept_X94        13621.8      289.2  47.109  < 2e-16 ***
## dept_X95        50308.4      280.7 179.203  < 2e-16 ***
## dept_X96        -3756.1      302.1 -12.431  < 2e-16 ***
## dept_X97        -5230.7      282.1 -18.544  < 2e-16 ***
## dept_X98       -13135.6      288.0 -45.605  < 2e-16 ***
## dept_X99       -25191.3      575.5 -43.774  < 2e-16 ***
## date_month_X2    1829.8      136.3  13.420  < 2e-16 ***
## date_month_X3    1423.9      131.8  10.806  < 2e-16 ***
## date_month_X4    1646.5      130.1  12.654  < 2e-16 ***
## date_month_X5    1639.7      134.0  12.240  < 2e-16 ***
## date_month_X6    2178.2      131.9  16.512  < 2e-16 ***
## date_month_X7    1747.7      130.0  13.441  < 2e-16 ***
## date_month_X8    1995.6      131.9  15.124  < 2e-16 ***
## date_month_X9     861.5      134.0   6.430 1.28e-10 ***
## date_month_X10   1154.7      131.9   8.756  < 2e-16 ***
## date_month_X11   3384.2      149.0  22.708  < 2e-16 ***
## date_month_X12   5366.8      140.1  38.295  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13280 on 294962 degrees of freedom
## Multiple R-squared:  0.6581, Adjusted R-squared:  0.6579 
## F-statistic:  4174 on 136 and 294962 DF,  p-value: < 2.2e-16